---
title: "R Notebook"
output: html_notebook
---

```{r}
library(tidyverse)
library(knitr)
P <- read_csv("../data/MarriagePanel.csv")
M <- read_csv("../data/MarriageFile.csv")
K <- read_csv("../data/KidPanelv2.csv")

```

# Facts and Moments from the PSID Data

## Sample Size

The number of marriages

```{r}
nrow(M)
```

The number of children:

```{r}
K %>% select(KID) %>% unique() %>% nrow()

```

## Marriage Sample Statistics

```{r}
# want: divorce rate. fertility rate. education men. education women. age at marriage.

t1 <- M %>%
  summarize(
    "Divorced by 2018" = mean(YDIV<9000),
    "Have Child" = mean(YBIRTH<9000),
    "College: Wife" = mean(edW),
    "College: Husband" = mean(edH),
    "Age at Marriage: Wife" = mean(YMAR - YBIRTH_M),
    "Age at Marriage: Husband" = mean(YMAR - YBIRTH_F),
    "Time to First Birth" = mean(YBIRTH[YBIRTH<9000] - YMAR[YBIRTH<9000]),
    "Mutual Consent law in year of marriage" = mean(YMAR<UD_year),
    "Sample Size" = as.integer(length(MID))
    ) %>%
  pivot_longer(cols=everything()) 
  
t2 <- P %>%
  summarize(
    "Average Wage: Wife" = mean(M_wage,na.rm = TRUE),
    "Average Wage: Husband" = mean(F_wage,na.rm = TRUE)
  ) %>%
  pivot_longer(cols=everything())

rbind(t1,t2) %>%
  kable()


```

```{r}
rbind(t1,t2) %>%
  kable("latex")
```


Age of children when divorce occurs.

```{r}
N = sum(M$YDIV<9000)
g <- M %>%
  filter(YDIV<9000) %>%
  mutate(ad = YDIV-YBIRTH) %>%
  mutate(AgeDiv = case_when(ad<0 ~ 0, (ad>=0 & ad<=5) ~ 1,(ad>5 & ad<=12) ~ 2,ad>=13 ~ 3)) %>%
  mutate(AgeDiv = factor(AgeDiv,levels=c(0,1,2,3),labels=c("Before any birth","0-5 years","6-12 years","13+ years"))) %>%
  group_by(AgeDiv) %>%
  summarise(frac = n() / N, se = sqrt(frac*(1-frac)/n())) %>%
  ggplot(aes(x=AgeDiv,y=frac,ymin=frac-1.96*se,ymax=frac+1.96*se)) + geom_point() + geom_errorbar(width=0.2) + geom_line() + theme_minimal() + ylab("Fraction") + xlab("Age of Oldest Child at Divorce")
ggsave("../output/figures/age_divorce.eps",g,width=4,height=3)
g
```



## Child Development Moments

We separate children into three groups (never divorced, will divorce, and divorced) and calculate average test scores. We'll do this at each age, then also standardized, before creating age groups also.

```{r}
K %>%
  select(AGE,AP_raw,dgroup) %>%
  drop_na() %>%
  mutate(dgroup = factor(dgroup,labels=c("Never Divorced","Divorced","Will Divorce"),levels=c(1,2,3))) %>%
  group_by(dgroup,AGE) %>%
  summarize(m = mean(AP_raw),se = sd(AP_raw)/sqrt(n())) %>%
  ggplot(aes(x=AGE,y=m,ymin=m-1.96*se,ymax=m+1.96*se,color=dgroup)) + geom_line() + geom_point() + geom_errorbar(width=0.) + ggtitle("Average AP score by Marital Status (raw)")
```

```{r}
K %>%
  select(AGE,AP_raw,dgroup) %>%
  drop_na() %>%
  mutate(dgroup = factor(dgroup,labels=c("Never Divorced","Divorced","Will Divorce"),levels=c(1,2,3))) %>%
  group_by(AGE) %>%
  mutate(AP_raw = (AP_raw - mean(AP_raw))/sd(AP_raw)) %>%
  #mutate(dgroup=dgroup=="Divorced") %>%
  group_by(dgroup,AGE) %>%
  summarize(m = mean(AP_raw),se = sd(AP_raw)/sqrt(n())) %>%
  ggplot(aes(x=AGE,y=m,ymin=m-1.96*se,ymax=m+1.96*se,color=dgroup)) + geom_line() + geom_point() + geom_errorbar(width=0.) + ggtitle("Average AP score by Marital Status (standardized)")
```

```{r}
g <- K %>%
  select(AGE,AP_raw,dgroup) %>%
  drop_na() %>%
  mutate(dgroup = factor(dgroup,labels=c("Never Divorced","Divorced","Will Divorce"),levels=c(1,2,3))) %>%
  mutate(age_group = case_when(AGE<=5 ~ 1,AGE>=14 ~ 4,AGE>=10 ~ 3,AGE>=6 ~ 2)) %>%
  mutate(age_group = factor(age_group,labels=c("0-5","6-9","10-13","14+"),levels=c(1,2,3,4))) %>%
  group_by(dgroup,age_group) %>%
  summarize(m = mean(AP_raw),se = sd(AP_raw)/sqrt(n())) %>%
  ggplot(aes(x=age_group,y=m,ymin=m-1.96*se,ymax=m+1.96*se,color=dgroup,shape=dgroup)) + geom_point(position = position_dodge(0.5)) + geom_errorbar(position=position_dodge(0.5),width=0.3) + theme_minimal() + xlab("Age Group") + ylab("Average Raw AP Score") + theme(legend.position = "bottom",legend.title = element_blank())

g + ggtitle("Average AP score by Marital Status (raw)")
ggsave("../output/figures/skill_patterns.eps",g,width=5,height=4)


```

```{r}
g <- K %>%
  select(AGE,AP_raw,dgroup) %>%
  drop_na() %>%
  mutate(dgroup = factor(dgroup,labels=c("Never Divorced","Divorced","Will Divorce"),levels=c(1,2,3))) %>%
  group_by(AGE) %>%
  mutate(AP_std = (AP_raw - mean(AP_raw))/sd(AP_raw)) %>%
  mutate(age_group = case_when(AGE<=5 ~ 1,AGE>=14 ~ 4,AGE>=10 ~ 3,AGE>=6 ~ 2)) %>%
  mutate(age_group = factor(age_group,labels=c("0-5","6-9","10-13","14+"),levels=c(1,2,3,4))) %>%
  #mutate(dgroup=dgroup=="Divorced") %>%
  group_by(dgroup,age_group) %>%
  summarize(m = mean(AP_std,na.rm = TRUE),se = sd(AP_std,na.rm = TRUE)/sqrt(n())) %>%
  ggplot(aes(x=age_group,y=m,ymin=m-1.96*se,ymax=m+1.96*se,color=dgroup)) + geom_point(position = position_dodge(0.5)) + geom_errorbar(position=position_dodge(0.5),width=0.3) 

g + ggtitle("Average AP score by Marital Status (standardized)")

```

```{r}
g <- K %>%
  filter(TSS>-6,TSS<7) %>%
  select(AGE,AP_raw,TSS) %>%
  drop_na() %>%
  group_by(AGE) %>%
  mutate(AP_std = (AP_raw - mean(AP_raw))/sd(AP_raw)) %>%
  group_by(TSS) %>%
  summarize(m = mean(AP_std,na.rm = TRUE),se = sd(AP_std,na.rm = TRUE)/sqrt(n())) %>%
  ggplot(aes(x=TSS,y=m,ymin=m-1.96*se,ymax=m+1.96*se)) + geom_line() + geom_errorbar()

g + ggtitle("Average AP score by Time since Divorce (standardized)")
```


### Comparison Using Separation instead of Divorce

```{r}


m1 <- K %>%
  select(AGE,AP_raw,dgroup) %>%
  drop_na() %>%
  mutate(dgroup = factor(dgroup,labels=c("Never Divorced","Divorced","Will Divorce"),levels=c(1,2,3))) %>%
  group_by(AGE) %>%
  mutate(AP_raw = (AP_raw - mean(AP_raw))/sd(AP_raw)) %>%
  mutate(age_group = case_when(AGE<=5 ~ 1,AGE>=14 ~ 4,AGE>=10 ~ 3,AGE>=6 ~ 2)) %>%
  mutate(age_group = factor(age_group,labels=c("0-5","6-9","10-13","14+"),levels=c(1,2,3,4))) %>%
  group_by(dgroup,age_group) %>%
  summarize(m = mean(AP_raw),se = sd(AP_raw)/sqrt(n())) %>%
  mutate(case="Using Divorce")

m2 <- K %>%
  select(AGE,AP_raw,sgroup) %>%
  drop_na() %>%
  mutate(dgroup = factor(sgroup,labels=c("Never Divorced","Divorced","Will Divorce"),levels=c(1,2,3))) %>%
  group_by(AGE) %>%
  mutate(AP_raw = (AP_raw - mean(AP_raw))/sd(AP_raw)) %>%
  mutate(age_group = case_when(AGE<=5 ~ 1,AGE>=14 ~ 4,AGE>=10 ~ 3,AGE>=6 ~ 2)) %>%
  mutate(age_group = factor(age_group,labels=c("0-5","6-9","10-13","14+"),levels=c(1,2,3,4))) %>%
  group_by(dgroup,age_group) %>%
  summarize(m = mean(AP_raw),se = sd(AP_raw)/sqrt(n())) %>%
  mutate(case="Using Separation")


g <- rbind(m1,m2) %>%
  ggplot(aes(x=age_group,y=m,ymin=m-1.96*se,ymax=m+1.96*se,color=dgroup)) + geom_point(position = position_dodge(0.5)) + geom_errorbar(position=position_dodge(0.5),width=0.3) + theme_minimal() + xlab("Age Group") + ylab("Average Raw AP Score") + theme(legend.position = "bottom",legend.title = element_blank()) + facet_grid(. ~ case)

g

```


```{r}
m1 <- K %>%
  select(AGE,AP_raw,dgroup) %>%
  drop_na() %>%
  mutate(dgroup = factor(dgroup,labels=c("Never Divorced","Divorced","Will Divorce"),levels=c(1,2,3))) %>%
  mutate(age_group = case_when(AGE<=5 ~ 1,AGE>=14 ~ 4,AGE>=10 ~ 3,AGE>=6 ~ 2)) %>%
  group_by(dgroup,age_group) %>%
  summarize(m = mean(AP_raw),se = sd(AP_raw)/sqrt(n())) %>%
  mutate(case="Using Divorce")

m2 <- K %>%
  select(AGE,AP_raw,sgroup) %>%
  drop_na() %>%
  mutate(dgroup = factor(sgroup,labels=c("Never Divorced","Divorced","Will Divorce"),levels=c(1,2,3))) %>%
  mutate(age_group = case_when(AGE<=5 ~ 1,AGE>=14 ~ 4,AGE>=10 ~ 3,AGE>=6 ~ 2)) %>%
  group_by(dgroup,age_group) %>%
  summarize(m = mean(AP_raw),se = sd(AP_raw)/sqrt(n())) %>%
  mutate(case="Using Separation")


g <- rbind(m1,m2) %>%
  ggplot(aes(x=age_group,y=m,ymin=m-1.96*se,ymax=m+1.96*se,color=dgroup,shape=dgroup)) + geom_point(position = position_dodge(0.5)) + geom_errorbar(position=position_dodge(0.5),width=0.3) + theme_minimal() + xlab("Age Group") + ylab("Average Raw AP Score") + theme(legend.position = "bottom",legend.title = element_blank()) + facet_grid(. ~ case)

ggsave("../output/figures/skill_patterns_separation.eps",g,width=6,height=4)
g

```

## Time Investment Patterns

### Basic patterns by age
```{r}
m1 <- K %>%
  select(AGE,tau_m,DIV) %>%
  drop_na() %>%
  group_by(DIV,AGE) %>%
  summarize(m = mean(tau_m),se = sd(tau_m)/sqrt(n())) %>%
  mutate(var = "Mother's Time")

m2 <- K %>%
  select(AGE,tau_f,DIV) %>%
  drop_na() %>%
  group_by(DIV,AGE) %>%
  summarize(m = mean(tau_f),se = sd(tau_f)/sqrt(n())) %>%
  mutate(var = "Father's Time")

rbind(m1,m2) %>%
  ggplot(aes(x=AGE,y=m,ymin=m-1.96*se,ymax=m+1.96*se,color=DIV)) + geom_line() + facet_grid(. ~ var)

```
### Patterns by age and by finer group (to show some degree of selection)

```{r}
m1 <- K %>%
  select(AGE,tau_m,dgroup) %>%
  drop_na() %>%
  group_by(dgroup,AGE) %>%
  summarize(m = mean(tau_m),se = sd(tau_m)/sqrt(n())) %>%
  mutate(var = "Mother's Time")

m2 <- K %>%
  select(AGE,tau_f,dgroup) %>%
  drop_na() %>%
  group_by(dgroup,AGE) %>%
  summarize(m = mean(tau_f),se = sd(tau_f)/sqrt(n())) %>%
  mutate(var = "Father's Time")

rbind(m1,m2) %>%
  ggplot(aes(x=AGE,y=m,ymin=m-1.96*se,ymax=m+1.96*se,color=as.factor(dgroup))) + geom_line() + facet_grid(. ~ var) + geom_errorbar(width=0)
```

## Patterns by age group and by divorce group

```{r}
m1 <- K %>%
  select(AGE,tau_m,dgroup) %>%
  drop_na() %>%
  mutate(dgroup = factor(dgroup,labels=c("Never Divorced","Divorced","Will Divorce"),levels=c(1,2,3))) %>%
  mutate(age_group = case_when(AGE<=5 ~ 1,AGE>=14 ~ 4,AGE>=10 ~ 3,AGE>=6 ~ 2)) %>%
  mutate(age_group = factor(age_group,labels=c("0-5","6-9","10-13","14+"),levels=c(1,2,3,4))) %>%
  group_by(dgroup,age_group) %>%
  summarize(m = mean(tau_m),se = sd(tau_m)/sqrt(n())) %>%
  mutate(var="Mother's Time")

m2 <- K %>%
  select(AGE,tau_f,dgroup) %>%
  drop_na() %>%
  mutate(dgroup = factor(dgroup,labels=c("Never Divorced","Divorced","Will Divorce"),levels=c(1,2,3))) %>%
  mutate(age_group = case_when(AGE<=5 ~ 1,AGE>=14 ~ 4,AGE>=10 ~ 3,AGE>=6 ~ 2)) %>%
  mutate(age_group = factor(age_group,labels=c("0-5","6-9","10-13","14+"),levels=c(1,2,3,4))) %>%
  group_by(dgroup,age_group) %>%
  summarize(m = mean(tau_f),se = sd(tau_f)/sqrt(n())) %>%
  mutate(var="Father's Time")

  
g <- rbind(m1,m2) %>%
  ggplot(aes(x=age_group,y=m,ymin=m-1.96*se,ymax=m+1.96*se,color=dgroup,shape=dgroup)) + geom_point(position = position_dodge(0.5)) + geom_errorbar(position=position_dodge(0.5),width=0.3) + facet_grid(. ~ var) + theme_minimal() + xlab("Age") + ylab("Hours / week") + theme(legend.position = "bottom",legend.title = element_blank())

g + ggtitle("Average Time Investment by Marital Status")

ggsave("../output/figures/time_patterns.eps",g,width=5.5,height=4)


```


## Some Regressions

### AP scores on divorce with fixed effects

```{r}
library(fixest)

mod1 <- K %>%
  group_by(AGE) %>%
  mutate(AP_std2 = (AP_raw - mean(AP_raw,na.rm = TRUE))/sd(AP_raw,na.rm=TRUE)) %>%
  mutate(div_exp = pmin(pmax(0,TSS),10)) %>%
  feols(AP_std2 ~ SEP + div_exp | MID + AGE, data=.) 

mod2 <- K %>%
  group_by(AGE) %>%
  mutate(AP_std2 = (AP_raw - mean(AP_raw,na.rm = TRUE))/sd(AP_raw,na.rm=TRUE)) %>%
  mutate(div_exp = pmin(pmax(0,TSD),10)) %>%
  feols(AP_std2 ~ DIV + div_exp | MID + AGE, data=.) 

mod3 <- K %>%
  group_by(AGE) %>%
  mutate(AP_std2 = (AP_raw - mean(AP_raw,na.rm = TRUE))/sd(AP_raw,na.rm=TRUE)) %>%
  mutate(div_exp = pmin(pmax(0,TSS),10)) %>%
  feols(AP_std2 ~ SEP + div_exp | KID + AGE, data=.) 

mod4 <- K %>%
  group_by(AGE) %>%
  mutate(AP_std2 = (AP_raw - mean(AP_raw,na.rm = TRUE))/sd(AP_raw,na.rm=TRUE)) %>%
  mutate(div_exp = pmin(pmax(0,TSD),10)) %>%
  feols(AP_std2 ~ DIV + div_exp | KID + AGE, data=.)

```


```{r}
etable(mod1,mod2,mod3,mod4)
```

```{r}
etable(mod1,mod2,mod3,mod4,tex = TRUE)

```


### Time Investment

For fathers, this is the regression that recovers the reduction:
```{r}

mod1 <- K %>%
  select(KID,tau_f,AGE,DIV) %>%
  drop_na() %>%
  group_by(AGE) %>%
  mutate(gamma_a = mean(tau_f[!DIV])) %>%
  feols(tau_f ~ gamma_a + (gamma_a:as.integer(DIV)) | KID,data=.)

mod2 <- K %>%
  select(MID,KID,tau_f,AGE,DIV) %>%
  drop_na() %>%
  group_by(AGE) %>%
  mutate(gamma_a = mean(tau_f[!DIV])) %>%
  feols(tau_f ~ gamma_a + (gamma_a:as.integer(DIV)) | MID,data=.)


```
The same for mothers:

```{r}
mod3 <- K %>%
  select(KID,tau_m,AGE,DIV) %>%
  drop_na() %>%
  group_by(AGE) %>%
  mutate(gamma_a = mean(tau_m[!DIV])) %>%
  feols(tau_m ~ gamma_a + (gamma_a:as.integer(DIV)) | KID,data=.)

mod4 <- K %>%
  select(MID,KID,tau_m,AGE,DIV) %>%
  drop_na() %>%
  group_by(AGE) %>%
  mutate(gamma_a = mean(tau_m[!DIV])) %>%
  feols(tau_m ~ gamma_a + (gamma_a:as.integer(DIV)) | MID,data=.)

```

```{r}
etable(mod1,mod2,mod3,mod4)
```

```{r}
etable(mod1,mod2,mod3,mod4,tex=TRUE)
```

## Custody Allocation Moments from the CPS

```{r}
# read in the data
library(vroom)
D <- vroom("../data/data-cps/cps_00038.csv") %>%
  filter(CSELIG==1,CSYRDIV<9998,CSDAYS<998,CSLEGCUS<98) %>%
  mutate(coll = EDUC>100,days = case_when(SEX==1 ~ 365-CSDAYS,SEX==2 ~ CSDAYS),legal=CSPHYCUS==2 | CSVISLEG==2) %>%
  rename(year_div = CSYRDIV) %>%
  filter(!is.na(days)) %>%
  mutate(phi = cut(days / 365, 5))

D %>% 
  group_by(phi) %>% 
  summarize(count=n()) %>% 
  ungroup() %>%
  mutate(p = count/sum(count)) #%>%
  write.csv("../data/CustodyMomentsSimple.csv")


```